#clean up
rm(list=ls())

#packages
#library(XLConnect)
library(irr)
library(tools)
library(gmodels)
library(maps)
library(foreign)
library(car)
library(memisc)

#load data
#setwd('/Volumes/MONOGAN/immigration/stateLaws/componentData')
#setwd('/Users/jamie/Documents/immigration/stateLaws/')
#data<-readWorksheetFromFile("immLawsMaster.xls",sheet=1,header=TRUE)
data<-read.csv("immLawsMaster12.csv")

#functions needed for cleaning later on
first.word<-function(x){x[1]}
second.word<-function(x){x[2]}

####RECODES AND WRITE-OUT FIXED DATA####
#topics
data$subject<-as.character(data$subject)
table(data$subject)
data$subject[data$subject=="budgets"]<-"appropriations"
data$subject[data$subject=="education/appropriations"]<-"education"
data$subject[data$subject=="healthcare"]<-"health"
data$subject[data$subject=="law/appropriations"]<-"law"
data$subject[data$subject=="license" | data$subject=="other licenses"]<-"licensing"
data$subject[data$subject=="justice"]<-"law"
data$subject[data$subject=="legal services"]<-"law"
data$subject[data$subject=="law enforcement"]<-"law"
data$subject[data$subject=="ominbus"]<-"omnibus"
data$subject[data$subject=="human trafficking"]<-"trafficking"
data$subject[data$subject=="public benefits"]<-"benefits"
table(data$subject)
table(data$subject,data$year)

#misspellings
data<-data[data$state!="DC",]
data$state[data$state=="Mississipps"]<-"Mississippi"
data$state[data$state=="Massachuessetts"]<-"Massachusetts"
table(data$state);length(table(data$state))
data$pro<-round(data$pro)
write.csv(data,"immLawsMasterClean.csv",row.names=F)

####CREATE POLICY BY STATE###
pro.overall<-as.matrix(by(data$scope[data$pro==1],INDICES=data$state[data$pro==1],FUN=sum)); pro.overall
con.overall<-as.matrix(by(data$scope[data$pro==0],INDICES=data$state[data$pro==0],FUN=sum)); con.overall
state.names<-row.names(pro.overall)
score<-log((pro.overall+1)/(con.overall+1))

state.aggregate<-as.data.frame(cbind(state.names))
state.aggregate$score<-log((pro.overall+1)/(con.overall+1))
state.aggregate$pro<-pro.overall
state.aggregate$con<-con.overall
#state.aggregate<-state.aggregate[-c(8,22,27),]

####CREATE PANEL DATA####
data$state.year<-paste(data$state,data$year,sep=".")
pro.panel<-as.data.frame(as.matrix(by(data$scope[data$pro==1],INDICES=list(data$state.year[data$pro==1]),FUN=sum))); head(pro.panel)
pro.panel$state.year<-rownames(pro.panel)
colnames(pro.panel)[1]<-"pro"
con.panel<-as.data.frame(as.matrix(by(data$scope[data$pro==0],INDICES=list(data$state.year[data$pro==0]),FUN=sum))); head(con.panel)
con.panel$state.year<-rownames(con.panel)
colnames(con.panel)[1]<-"con"
panel.data<-merge(x=con.panel,y=pro.panel,by="state.year",all=T)
panel.data$state<-as.character(lapply(strsplit(panel.data$state.year,split="[.]"),first.word))
panel.data$year<-as.character(lapply(strsplit(panel.data$state.year,split="[.]"),second.word))
table(panel.data$state)
table(panel.data$year)

#expand the data
expander<-expand.grid(names(table(panel.data$state)),as.numeric(names(table(panel.data$year))))
colnames(expander)<-c("state","year")
expander$hold<-1
panel.data<-merge(x=expander,y=panel.data,by=c("state","year"),all=T)
panel.data<-subset(panel.data,select=-hold)
panel.data$state.year<-paste(panel.data$state,panel.data$year,sep=".")

#missings are zero, then make out-of-session missing
#every other year: Montana, Nevada, North Dakota, and Texas.
#Switch: Oregon (2011), Arkansas (2009)
#19 May 2017: http://www.ncsl.org/research/about-state-legislatures/annual-versus-biennial-legislative-sessions.aspx
panel.data$con[is.na(panel.data$con)]<-0
panel.data$pro[is.na(panel.data$pro)]<-0
no.session<-panel.data$state.year%in%c('Montana.2006','Montana.2008','Montana.2010','Montana.2012','Montana.2014','Montana.2016','Nevada.2006','Nevada.2008','Nevada.2010','Nevada.2012','Nevada.2014','Nevada.2016','North Dakota.2006','North Dakota.2008','North Dakota.2010','North Dakota.2012','North Dakota.2014','North Dakota.2016','Texas.2006','Texas.2008','Texas.2010','Texas.2012','Texas.2014','Texas.2016','Oregon.2006','Oregon.2008','Oregon.2010','Arkansas.2006','Arkansas.2008')
panel.data$con[no.session]<-NA
panel.data$pro[no.session]<-NA
panel.data[no.session,]

#compute score 
panel.data$score<-log((panel.data$pro+1)/(panel.data$con+1))
panel.data<-subset(panel.data,select=-state.year)


###CREATE POLICY-SPECIFIC DATA###
####CREATE PANEL DATA####
data$state.policy<-paste(data$state,data$subject,sep=".")
pro.policy<-as.data.frame(as.matrix(by(data$scope[data$pro==1],INDICES=list(data$state.policy[data$pro==1]),FUN=sum))); head(pro.policy)
pro.policy$state.policy<-rownames(pro.policy)
colnames(pro.policy)[1]<-"pro"
con.policy<-as.data.frame(as.matrix(by(data$scope[data$pro==0],INDICES=list(data$state.policy[data$pro==0]),FUN=sum))); head(con.policy)
con.policy$state.policy<-rownames(con.policy)
colnames(con.policy)[1]<-"con"
policy.data<-merge(x=con.policy,y=pro.policy,by="state.policy",all=T)
policy.data$state<-as.character(lapply(strsplit(policy.data$state.policy,split="[.]"),first.word))
policy.data$policy<-as.character(lapply(strsplit(policy.data$state.policy,split="[.]"),second.word))
table(policy.data$state)
table(policy.data$policy)

#expand the data
expander<-expand.grid(names(table(policy.data$state)),names(table(policy.data$policy)))
colnames(expander)<-c("state","policy")
expander$hold<-1
policy.data<-merge(x=expander,y=policy.data,by=c("state","policy"),all=T)
policy.data<-subset(policy.data,select=-c(hold,state.policy))

#missings for anything are zero: an area was never addressed in 11 years
policy.data$con[is.na(policy.data$con)]<-0
policy.data$pro[is.na(policy.data$pro)]<-0

#compute policy score score
policy.data$score<-log((policy.data$pro+1)/(policy.data$con+1))


####MERGE COVARIATES INTO PANEL DATA####
#load 2013 article data
past<-read.dta('stateImmig0511.dta')
to.merge<-subset(past,select=c(State,ID,squireProfess,termLimits))
to.merge$state<-toTitleCase(tolower(to.merge$State))
to.merge<-subset(to.merge,select=-State)

###Change in Foreign Born###
#Source: MPI Data Hub
#Accessed 22 May 2017
#http://www.migrationpolicy.org/programs/data-hub/us-immigration-trends
fb<-read.csv("fb.csv") 
to.merge<-merge(x=to.merge,y=fb,by="state")

###CCES Ideology Data##
cces<-read.csv('ccesIdeolPanel.csv')
#cces$year<-as.numeric(cces$year)
#panel.data$state<-as.character(panel.data$state)
panel.data<-merge(x=panel.data,y=cces,by=c("state","year"))
dim(panel.data);head(panel.data)
panel.data[panel.data$year==2016,]

###Per Capita GSP in Chained 2009 Dollars###
#Accessed 22 May 2017 from bea.gov
gsp.wide<-read.csv('pcRealGSP.csv')
gsp<-reshape(gsp.wide,varying=c("X2005","X2006","X2007","X2008","X2009","X2010","X2011","X2012","X2013","X2014","X2015","X2016"),
	timevar="year",idvar="state",direction="long",sep="")
colnames(gsp)[3]<-"gsp"
gsp$gsp<-gsp$gsp/1000
panel.data<-merge(x=panel.data,y=gsp,by=c("state","year"))

###Unified Party Control###
#ranney4_control: 0=Unified GOP, 0.5=split, 1=Unified Dem
csp<-read.dta("partyControl.dta")
csp$ranney4_control[csp$year>2010]<-.5
csp$ranney4_control[csp$st=="NE"]<-0
csp$ranney4_control[csp$year==2011 & csp$st%in%c('AL','AZ','FL','GA','ID','IN','KS','ME','MI','ND','OH','OK','PA','SC','SD','TN','TX','UT','WI','WY')]<-0
csp$ranney4_control[csp$year==2011 & csp$st%in%c('AR','CA','CT','DE','HI','IL','MD','MA','VT','WA','WV')]<-1
csp$ranney4_control[csp$year==2012 & csp$st%in%c('AL','AZ','FL','GA','ID','IN','KS','LA','ME','MI','MS','ND','OH','OK','PA','SC','SD','TN','TX','UT','WI','WY')]<-0
csp$ranney4_control[csp$year==2012 & csp$st%in%c('AR','CA','CT','DE','HI','IL','MD','MA','VT','WA','WV')]<-1
csp$ranney4_control[csp$year==2013 & csp$st%in%c('AL','AK','AZ','FL','GA','ID','IN','KS','LA','MI','MS','NC','ND','OH','OK','PA','SC','SD','TN','TX','UT','WI','WY')]<-0
csp$ranney4_control[csp$year==2013 & csp$st%in%c('CA','CO','CT','DE','HI','IL','MD','MA','MN','NY','OR','VT','WA','WV')]<-1
csp$ranney4_control[csp$year==2014 & csp$st%in%c('AL','AK','AZ','FL','GA','ID','IN','KS','LA','MI','MS','NC','ND','OH','OK','PA','SC','SD','TN','TX','UT','WI','WY')]<-0
csp$ranney4_control[csp$year==2014 & csp$st%in%c('CA','CO','CT','DE','HI','IL','MD','MA','MN','NY','OR','RI','VT','WA','WV')]<-1
csp$ranney4_control[csp$year==2015 & csp$st%in%c('AL','AZ','AR','FL','GA','ID','IN','KS','LA','MI','MS','NV','NC','ND','OH','OK','SC','SD','TN','TX','UT','WI','WY')]<-0
csp$ranney4_control[csp$year==2015 & csp$st%in%c('CA','CT','DE','HI','OR','RI','VT')]<-1
csp$ranney4_control[csp$year==2016 & csp$st%in%c('AL','AZ','AR','FL','GA','ID','IN','KS','LA','MI','MS','NV','NC','ND','OH','OK','SC','SD','TN','TX','UT','WI','WY')]<-0
csp$ranney4_control[csp$year==2016 & csp$st%in%c('CA','CT','DE','HI','OR','RI','VT')]<-1


csp$demUnif<-as.numeric(csp$ranney4_control==1)
csp$repUnif<-as.numeric(csp$ranney4_control==0)
csp<-subset(csp,select=c(year,state,demUnif,repUnif))
panel.data<-merge(x=panel.data,y=csp,by=c("state","year"))
#as.matrix(by(panel.data$demUnif,panel.data$year,sum))
#as.matrix(by(panel.data$repUnif,panel.data$year,sum))

#merge time-invariant data with panel data
panel.data<-merge(x=panel.data,y=to.merge,by="state")


###Averages and Sums of Over Time of Predictors by State###
mean.collapse<-aggregate(panel.data[,c('cces.ideol','cces.smooth','gsp')],by=list(panel.data$state),FUN=mean,na.rm=T)
colnames(mean.collapse)[1]<-'state'
to.merge<-merge(x=to.merge,y=mean.collapse,by='state')
sum.collapse<-aggregate(panel.data[,c('demUnif','repUnif')],by=list(panel.data$state),FUN=sum,na.rm=T)
colnames(sum.collapse)[1]<-'state'
to.merge<-merge(x=to.merge,y=sum.collapse,by='state')
colnames(state.aggregate)[1]<-'state'

#merge into cross-sectional data sets
state.aggregate<-merge(x=to.merge,y=state.aggregate,by='state')
policy.data<-merge(x=to.merge,y=policy.data,by='state')



####WRITE OUT FILES####
write.csv(state.aggregate,"stateAggregate.csv",row.names=F)
write.csv(panel.data,"panelImmig.csv",row.names=F)
write.csv(policy.data,"policyArea.csv",row.names=F)

